{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as ticker\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import unicodedata\n",
    "import re\n",
    "import numpy as np\n",
    "import os\n",
    "import io\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = './datasets/cmn-eng/cmn.txt'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 一、文本预处理：中英文不同"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess_eng(w):\n",
    "    w = w.lower().strip()\n",
    "\n",
    "    # 在单词与跟在其后的标点符号之间插入一个空格\n",
    "    # 例如： \"he is a boy.\" => \"he is a boy .\"\n",
    "    w = re.sub(r\"([?.!,¿])\", r\" \\1 \", w)\n",
    "    w = re.sub(r'[\" \"]+', \" \", w)\n",
    "\n",
    "    # 除了 (a-z, A-Z, \".\", \"?\", \"!\", \",\")，将所有字符替换为空格\n",
    "    w = re.sub(r\"[^a-zA-Z?.!,¿]+\", \" \", w)\n",
    "\n",
    "    w = w.rstrip().strip()\n",
    "\n",
    "    # 给句子加上开始和结束标记，以便模型知道何时开始和结束预测\n",
    "    w = '<start> ' + w + ' <end>'\n",
    "    return w\n",
    "\n",
    "\n",
    "def preprocess_ch(w):\n",
    "    w = zhconv.convert(w, 'zh-cn')\n",
    "    w = ' '.join(jieba.cut(w))\n",
    "    w = w.rstrip().strip()\n",
    "    w = '<start> ' + w + ' <end>'\n",
    "    return w\n",
    "\n",
    "\n",
    "def create_dataset(path, num_examples):\n",
    "    lines = io.open(path, encoding='utf-8').read().strip().split('\\n')\n",
    "    eng, ch = [], []\n",
    "    for l in lines[:num_examples]:\n",
    "        word_pairs = l.split('\\t')\n",
    "        eng.append(preprocess_eng(word_pairs[0]))\n",
    "        ch.append(preprocess_ch(word_pairs[1]))\n",
    "    return [eng, ch]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eng, ch = create_dataset(path, None)\n",
    "print(eng[-1])\n",
    "print(ch[-1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 二、文本向量化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def max_length(tensor):\n",
    "    return max(len(t) for t in tensor)\n",
    "\n",
    "\n",
    "def tokenize(lang):\n",
    "    lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters=' ')\n",
    "    lang_tokenizer.fit_on_texts(lang)\n",
    "    tensor = lang_tokenizer.texts_to_sequences(lang)\n",
    "    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,\n",
    "                                                           padding='post')\n",
    "    return tensor, lang_tokenizer\n",
    "\n",
    "\n",
    "def load_dataset(path, num_examples=None):\n",
    "    eng, ch = create_dataset(path, num_examples)\n",
    "    ch_tensor, ch_tokenizer = tokenize(ch)\n",
    "    eng_tensor, eng_tokenizer = tokenize(eng)\n",
    "    return ch_tensor, eng_tensor, ch_tokenizer, eng_tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_examples = None\n",
    "ch_tensor, eng_tensor, ch_tokenizer, eng_tokenizer = load_dataset(\n",
    "    path, num_examples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存分词器，模型复用\n",
    "# tokenizer_json = ch_tokenizer.to_json()\n",
    "# with io.open('ch_tokenizer.json', 'w', encoding='utf-8') as f:\n",
    "#     f.write(json.dumps(tokenizer_json, ensure_ascii=False))\n",
    "#\n",
    "# with open('ch_tokenizer.json') as f:\n",
    "#     data = json.load(f)\n",
    "#     tokenizer = tokenizer_from_json(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_length_ch, max_length_eng = max_length(ch_tensor), max_length(eng_tensor)\n",
    "\n",
    "ch_tensor_train, ch_tensor_val, eng_tensor_train, eng_tensor_val = train_test_split(\n",
    "    ch_tensor, eng_tensor, test_size=0.2)\n",
    "\n",
    "print(len(ch_tensor_train), len(ch_tensor_val), len(eng_tensor_train),\n",
    "      len(eng_tensor_val))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert(tokenizer, tensor):\n",
    "    for t in tensor:\n",
    "        if t != 0:\n",
    "            print(\"%d ----> %s\" % (t, tokenizer.index_word[t]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Input Language; index to word mapping\")\n",
    "convert(ch_tokenizer, ch_tensor_train[0])\n",
    "print()\n",
    "print(\"Target Language; index to word mapping\")\n",
    "convert(eng_tokenizer, eng_tensor_train[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 三、创建 tf.data 数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BUFFER_SIZE = len(ch_tensor_train)\n",
    "BATCH_SIZE = 64\n",
    "steps_per_epoch = len(ch_tensor_train) // BATCH_SIZE\n",
    "embedding_size = 256\n",
    "units = 1024\n",
    "vocab_inp_size = len(ch_tokenizer.word_index) + 1\n",
    "vocab_targ_size = len(eng_tokenizer.word_index) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = tf.data.Dataset.from_tensor_slices(\n",
    "    (ch_tensor_train, eng_tensor_train)).shuffle(BUFFER_SIZE)\n",
    "dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "example_inp_batch, example_targ_batch = next(iter(dataset))\n",
    "print(example_inp_batch.shape, example_targ_batch.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 四、编码器和解码器模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. 编码器结构：\n",
    "\n",
    "# 输入的中文数据，[batch_size, max_length_inp]\n",
    "# ---> Embedding 层  ---> [batch_size,max_length_inp, embedding_dim]\n",
    "# ---> GRU 层 ---> 输出状态[batch_size, max_length_inp, enc_units]， 隐藏状态 [\n",
    "# batch_size,  enc_units]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Encoder(tf.keras.Model):\n",
    "    def __init__(self, vocab_size, embedding_dim, enc_units, batch_size):\n",
    "        super(Encoder, self).__init__()\n",
    "        self.batch_size = batch_size\n",
    "        self.enc_units = enc_units\n",
    "        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n",
    "        self.gru = tf.keras.layers.GRU(self.enc_units,\n",
    "                                       return_sequences=True,\n",
    "                                       return_state=True,\n",
    "                                       recurrent_initializer='glorot_uniform')\n",
    "\n",
    "    def call(self, x, hidden):\n",
    "        x = self.embedding(x)\n",
    "        output, state = self.gru(x, initial_state=hidden)\n",
    "        return output, state\n",
    "\n",
    "    def initialize_hidden_state(self):\n",
    "        return tf.zeros((self.batch_size, self.enc_units))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "encoder = Encoder(vocab_inp_size, embedding_size, units, BATCH_SIZE)\n",
    "sample_hidden = encoder.initialize_hidden_state()\n",
    "sample_output, sample_hidden = encoder(example_inp_batch, sample_hidden)\n",
    "print('Encoder output shape: (batch size, sequence length, units) {}'.format(\n",
    "    sample_output.shape))\n",
    "print('Encoder Hidden state shape: (batch size, units) {}'.format(\n",
    "    sample_hidden.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. 注意力机制\n",
    "# values 为上一步的输出 [batch_size, max_length_inp, enc_units]\n",
    "# query 为上一步的隐藏状态 [batch_size, enc_units]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class BahdanauAttention(tf.keras.layers.Layer):\n",
    "    def __init__(self, units):\n",
    "        super(BahdanauAttention, self).__init__()\n",
    "        self.W1 = tf.keras.layers.Dense(units)\n",
    "        self.W2 = tf.keras.layers.Dense(units)\n",
    "        self.V = tf.keras.layers.Dense(1)\n",
    "\n",
    "    def call(self, query, values):\n",
    "        hidden_with_time_axis = tf.expand_dims(query, 1)\n",
    "        # [batch_size, 1, enc_units]\n",
    "\n",
    "        score = self.V(\n",
    "            tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))\n",
    "        # [batch_size, mex_length_inp, 1]\n",
    "\n",
    "        attention_weights = tf.nn.softmax(score, axis=1)\n",
    "        # [batch_size, mex_length_inp, 1]\n",
    "\n",
    "        context_vector = attention_weights * values\n",
    "        # [batch_size, max_length_inp, enc_units]\n",
    "\n",
    "        context_vector = tf.reduce_sum(context_vector, axis=1)\n",
    "        # [batch_size, enc_units]\n",
    "\n",
    "        return context_vector, attention_weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "attention_layer = BahdanauAttention(10)\n",
    "attention_result, attention_weights = attention_layer(sample_hidden,\n",
    "                                                      sample_output)\n",
    "\n",
    "print(\"Attention result shape: (batch size, units) {}\".format(\n",
    "    attention_result.shape))\n",
    "print(\"Attention weights shape: (batch_size, sequence_length, 1) {}\".format(\n",
    "    attention_weights.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. 解码器结构：\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Decoder(tf.keras.Model):\n",
    "    def __init__(self, vocab_size, embedding_dim, dec_units, batch_size):\n",
    "        super(Decoder, self).__init__()\n",
    "        self.batch_size = batch_size\n",
    "        self.dec_units = dec_units\n",
    "        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n",
    "        self.gru = tf.keras.layers.GRU(self.dec_units,\n",
    "                                       return_sequences=True,\n",
    "                                       return_state=True,\n",
    "                                       recurrent_initializer='glorot_uniform')\n",
    "        self.fc = tf.keras.layers.Dense(vocab_size)\n",
    "        self.attention = BahdanauAttention(self.dec_units)\n",
    "\n",
    "    def call(self, x, hidden, enc_output):\n",
    "        # 逐个单词进行翻译\n",
    "        # x: [batch_size, 1],\n",
    "        # hidden: [batch_size, units]\n",
    "        # enc_output: [batch_size, max_length_inp, enc_units]\n",
    "\n",
    "        context_vector, attention_weights = self.attention(hidden, enc_output)\n",
    "        # [batch_size, enc_units]，[batch_size, max_length_inp, 1]\n",
    "\n",
    "        x = self.embedding(x)\n",
    "        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)\n",
    "        # [batch_size, 1, embedding_dim+enc_units]\n",
    "\n",
    "        output, state = self.gru(x)\n",
    "        output = tf.reshape(output, (-1, output.shape[2]))\n",
    "        x = self.fc(output)\n",
    "\n",
    "        return x, state, attention_weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "decoder = Decoder(vocab_targ_size, embedding_size, units, BATCH_SIZE)\n",
    "sample_decoder_output, _, _ = decoder(tf.random.uniform((64, 1)),\n",
    "                                      sample_hidden, sample_output)\n",
    "print('Decoder output shape: (batch_size, vocab size) {}'.format(\n",
    "    sample_decoder_output.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. 优化器及损失函数："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = tf.keras.optimizers.Adam()\n",
    "loss_objects = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,\n",
    "                                                             reduction='none')\n",
    "\n",
    "\n",
    "def loss_function(real, pred):\n",
    "    mask = tf.math.logical_not(tf.math.equal(real, 0))\n",
    "    # 所有非零处为 True，零处 False\n",
    "    # 数据向量化时都是在后部补 0 达到相同的长度\n",
    "\n",
    "    loss_ = loss_objects(real, pred)\n",
    "\n",
    "    mask = tf.cast(mask, dtype=loss_.dtype)\n",
    "    loss_ *= mask\n",
    "\n",
    "    return tf.reduce_mean(loss_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. 训练模型："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "checkpoint_dir = '../H/save/zh2eng_attention'\n",
    "# checkpoint_prefix = os.path.join(checkpoint_dir, \"cpkt\")\n",
    "checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder,\n",
    "                                 decoder=decoder)\n",
    "\n",
    "manager = tf.train.CheckpointManager(checkpoint, directory=checkpoint_dir,\n",
    "                                     checkpoint_name='model.ckpt',\n",
    "                                     max_to_keep=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@tf.function\n",
    "def train_step(inp, targ, enc_hidden):\n",
    "    loss = 0\n",
    "    with tf.GradientTape() as tape:\n",
    "        enc_output, enc_hidden = encoder(inp, enc_hidden)\n",
    "        dec_hidden = enc_hidden\n",
    "        dec_input = tf.expand_dims([eng_tokenizer.word_index['<start>']] *\n",
    "                                   BATCH_SIZE, 1)\n",
    "\n",
    "        for t in range(1, targ.shape[1]):\n",
    "            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden,\n",
    "                                                 enc_output)\n",
    "            loss += loss_function(targ[:, t], predictions)\n",
    "            dec_input = tf.expand_dims(targ[:, t], 1)\n",
    "\n",
    "    batch_loss = (loss / int(targ.shape[1]))\n",
    "    variables = encoder.trainable_variables + decoder.trainable_variables\n",
    "    gradients = tape.gradient(loss, variables)\n",
    "    optimizer.apply_gradients(grads_and_vars=zip(gradients, variables))\n",
    "    return batch_loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "EPOCHS = 10\n",
    "for epoch in range(EPOCHS):\n",
    "    start = time.time()\n",
    "\n",
    "    enc_hidden = encoder.initialize_hidden_state()\n",
    "    total_loss = 0\n",
    "\n",
    "    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):\n",
    "        batch_loss = train_step(inp, targ, enc_hidden)\n",
    "        total_loss += batch_loss\n",
    "\n",
    "        if batch % 100 == 0:\n",
    "            print(\"Epoch {} Batch {} Loss {:4f}\".format(\n",
    "                epoch + 1, batch, batch_loss.numpy()))\n",
    "\n",
    "        if (epoch + 1) % 2 == 0:\n",
    "            # checkpoint.save(file_prefix=checkpoint_prefix)\n",
    "\n",
    "            manager.save()\n",
    "\n",
    "        print('Epoch {} Loss {:.4f}'.format(epoch + 1,\n",
    "                                            total_loss / steps_per_epoch))\n",
    "        print('Time taken for 1 epoch {} sec\\n'.format(time.time() - start))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate(sentence):\n",
    "    attention_plot = np.zeros((max_length_eng, max_length_ch))\n",
    "\n",
    "    sentence = preprocess_ch(sentence)\n",
    "    inputs = [ch_tokenizer.word_index[i] for i in sentence.split(' ')]\n",
    "    inputs = tf.keras.preprocessing.sequence.pad_sequences(\n",
    "        [inputs], maxlen=max_length_ch, padding='post')\n",
    "    inputs = tf.convert_to_tensor(inputs)\n",
    "\n",
    "    result = ''\n",
    "    hidden = [tf.zeros((1, units))]\n",
    "    enc_out, enc_hidden = encoder(inputs, hidden)\n",
    "\n",
    "    dec_hidden = enc_hidden\n",
    "    dec_input = tf.expand_dims([eng_tokenizer.word_index['<start>']], 0)\n",
    "    for t in range(max_length_eng):\n",
    "        predictions, dec_hidden, attention_weights = decoder(\n",
    "            dec_input, dec_hidden, enc_out)\n",
    "        attention_weights = tf.reshape(attention_weights, (-1, ))\n",
    "        attention_plot[t] = attention_weights.numpy()\n",
    "        predicted_id = tf.argmax(predictions[0]).numpy()\n",
    "        result += eng_tokenizer.index_word[predicted_id] + ' '\n",
    "\n",
    "        if eng_tokenizer.index_word[predicted_id] == '<end>':\n",
    "            return result, sentence, attention_plot\n",
    "\n",
    "        dec_input = tf.expand_dims([predicted_id], 0)\n",
    "\n",
    "    return result, sentence, attention_plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_attention(attention, sentence, predicted_sentence):\n",
    "    fig = plt.figure(figsize=(10, 10))\n",
    "    ax = fig.add_subplot(1, 1, 1)\n",
    "    ax.matshow(attention, cmap='viridis')\n",
    "\n",
    "    fontdict = {'fontsize': 14}\n",
    "    ax.set_xticklabels([''] + sentence, fontdict=fontdict)\n",
    "    ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)\n",
    "    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n",
    "    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))\n",
    "\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def translate(sentence):\n",
    "    result, sentence, attention_plot = evaluate(sentence)\n",
    "\n",
    "    print(\"Input : %s\" % (sentence))\n",
    "    print(\"Predicted translation : {}\".format(result))\n",
    "    attention_plot = attention_plot[:len(result.split(' ')\n",
    "                                         ), :len(sentence.split(' '))]\n",
    "    plot_attention(attention_plot, sentence.split(' '), result.split(' '))\n",
    "\n",
    "\n",
    "checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))\n",
    "\n",
    "translate(u'我迷失了')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
